rm(list = ls())

load("new.RData")

save(list = ls(all = TRUE), file= "new.RData")

install.packages("")

pacman::p_load(dplyr, tidyr, data.table, stringdist, lubridate,
  readxl, writexl, foreign, ggplot2, scales, gridExtra,
  RCzechia, car, pROC, broom, margins, effects, sjPlot, ggeffects, stargazer, igraph)

# 2022 data input
kvrzcoco_22 <- read_excel("data/KV2022reg20241208_xlsx/kvrzcoco.xlsx")
kvrzcoco_22 <- kvrzcoco_22[kvrzcoco_22$OBVODY<=1,]
kvrzcoco_22 <- kvrzcoco_22[!duplicated(kvrzcoco_22[, c("DATUMVOLEB", "KODZASTUP")]),]
kvrzcoco_22$TYPDUVODU[kvrzcoco_22$DATUMVOLEB==20220923] <- 0
kvrk_22 <- read_excel("data/KV2022reg20241208_xlsx/kvrk.xlsx")
kvt_22 <- read_excel("data/KV2022_data_20241208_xlsx/kvt3.xlsx")
kvt_22 <- kvt_22 %>% # deletion of initial results changed by courts
  group_by(KODZASTUP) %>%
  filter(!(any(DATUMVOLEB == 20221115) & DATUMVOLEB == 20220923)) %>%
  ungroup()
kvrk_22 <- kvrk_22 %>%
  group_by(KODZASTUP) %>%
  filter(!(any(DATUMVOLEB == 20221115) & DATUMVOLEB == 20220923)) %>%
  ungroup()
kvrzcoco_22 <- kvrzcoco_22 %>%
  group_by(KODZASTUP) %>%
  filter(!(any(DATUMVOLEB == 20221115) & DATUMVOLEB == 20220923)) %>%
  ungroup()
kvrzcoco_22$TYPDUVODU <- ifelse(kvrzcoco_22$DATUMVOLEB==20221115,0,kvrzcoco_22$TYPDUVODU)

# 2018 data input
kvrzcoco_18 <- read_excel("data/KV2018_reg_20230224_xlsx/kvrzcoco.xlsx")
kvrzcoco_18 <- kvrzcoco_18[kvrzcoco_18$OBVODY<=1,]
kvrzcoco_18 <- kvrzcoco_18[!duplicated(kvrzcoco_18[, c("DATUMVOLEB", "KODZASTUP")]),]
kvrzcoco_18$TYPDUVODU[kvrzcoco_18$DATUMVOLEB==20181005] <- 0
kvrk_18 <- read_excel("data/KV2018_reg_20230224_xlsx/kvrk.xlsx")
kvt_18 <- read_excel("data/KV2018_data_20230224_xlsx/kvt3.xlsx")
kvt_18 <- kvt_18 %>% # deletion of initial results changed by courts
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20181130) & DATUMVOLEB == 20181005)|
           (any(DATUMVOLEB == 20190930) & DATUMVOLEB == 20190914))) %>%
  ungroup()
kvrk_18 <- kvrk_18 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20181130) & DATUMVOLEB == 20181005)|
             (any(DATUMVOLEB == 20190930) & DATUMVOLEB == 20190914))) %>%
  ungroup()
kvrzcoco_18 <- kvrzcoco_18 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20181130) & DATUMVOLEB == 20181005)|
             (any(DATUMVOLEB == 20190930) & DATUMVOLEB == 20190914))) %>%
  ungroup()
kvrzcoco_18$TYPDUVODU <- ifelse(kvrzcoco_18$DATUMVOLEB==20181130,0,kvrzcoco_18$TYPDUVODU)

# 2014 data input
kvrzcoco_14 <- read_excel("data/KV2014_reg_20230224_xlsx/kvrzcoco.xlsx")
kvrzcoco_14 <- kvrzcoco_14[kvrzcoco_14$OBVODY<=1,]
kvrzcoco_14 <- kvrzcoco_14[!duplicated(kvrzcoco_14[, c("DATUMVOLEB", "KODZASTUP")]),]
kvrk_14 <- read_excel("data/KV2014_reg_20230224_xlsx/kvrk.xlsx")
kvt_14 <- read_excel("data/KV2014_data_20230224_xlsx/kvt3.xlsx")
kvt_14 <- kvt_14 %>% # deletion of initial results changed by courts
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20141121) & DATUMVOLEB == 20141010)|
             (any(DATUMVOLEB == 20161201) & DATUMVOLEB == 20161105))) %>%
  ungroup()
kvrk_14 <- kvrk_14 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20141121) & DATUMVOLEB == 20141010)|
             (any(DATUMVOLEB == 20161201) & DATUMVOLEB == 20161105))) %>%
  ungroup()
kvrzcoco_14 <- kvrzcoco_14 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20141121) & DATUMVOLEB == 20141010)|
             (any(DATUMVOLEB == 20161201) & DATUMVOLEB == 20161105))) %>%
  ungroup()
kvrzcoco_14$TYPDUVODU <- ifelse(kvrzcoco_14$DATUMVOLEB==20141121,0,kvrzcoco_14$TYPDUVODU)

# 2010 data input
kvrzcoco_10 <- read_excel("data/KV2010_reg_20230224_xlsx/kvrzcoco.xlsx")
kvrzcoco_10 <- kvrzcoco_10[kvrzcoco_10$OBVODY<=1,]
kvrzcoco_10 <- kvrzcoco_10[!duplicated(kvrzcoco_10[, c("DATUMVOLEB", "KODZASTUP")]),]
kvrk_10 <- read_excel("data/KV2010_reg_20230224_xlsx/kvrk.xlsx")
kvt_10 <- read_excel("data/KV2010_data_20230224_xlsx/kvt3.xlsx")
kvrzcoco_10$TYPDUVODU[kvrzcoco_10$DATUMVOLEB==20110205 & 
                        kvrzcoco_10$NAZEVZAST%in%c("Sobíšky","Libenice","Zlosyň","Zhoř","Nezdice","Jezernice","Prameny",
                                                   "Bohatice","Myslinka","Podbořanský Rohozec","Dražovice","Kovanec",
                                                   "Choteč","Borovnice","Chleny","Brumov","Horní Kounice","Řitonice")] <- 2 # data repair
kvt_10 <- kvt_10 %>% # deletion of initial results changed by courts
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20101119) & DATUMVOLEB == 20101015)|
             (any(DATUMVOLEB == 20121112) & DATUMVOLEB == 20121013))) %>%
  ungroup()
kvrk_10 <- kvrk_10 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20101119) & DATUMVOLEB == 20101015)|
             (any(DATUMVOLEB == 20121112) & DATUMVOLEB == 20121013))) %>%
  ungroup()
kvrzcoco_10 <- kvrzcoco_10 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20101119) & DATUMVOLEB == 20101015)|
             (any(DATUMVOLEB == 20121112) & DATUMVOLEB == 20121013))) %>%
  ungroup()
kvrzcoco_10$TYPDUVODU <- ifelse(kvrzcoco_10$DATUMVOLEB==20101119,0,kvrzcoco_10$TYPDUVODU)

# 2006 data input
kvrzcoco_06 <- read_excel("data/KV2006_reg_20230224_xlsx/kvrzcoco.xlsx")
kvrzcoco_06 <- kvrzcoco_06[kvrzcoco_06$OBVODY<=1,]
kvrzcoco_06 <- kvrzcoco_06[!duplicated(kvrzcoco_06[, c("DATUMVOLEB", "KODZASTUP")]),]
kvrk_06 <- read_excel("data/KV2006_reg_20230224_xlsx/kvrk.xlsx")
kvt_06 <- read_excel("data/KV2006_data_20230224_xlsx/kvt3.xlsx")
kvt_06 <- kvt_06 %>% # deletion of initial results changed by courts
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20061127) & DATUMVOLEB == 20061020)|
             (any(DATUMVOLEB == 20070125) & DATUMVOLEB == 20061020))) %>%
  ungroup()
kvrk_06 <- kvrk_06 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20061127) & DATUMVOLEB == 20061020)|
             (any(DATUMVOLEB == 20070125) & DATUMVOLEB == 20061020))) %>%
  ungroup()
kvrzcoco_06 <- kvrzcoco_06 %>%
  group_by(KODZASTUP) %>%
  filter(!((any(DATUMVOLEB == 20061127) & DATUMVOLEB == 20061020)|
             (any(DATUMVOLEB == 20070125) & DATUMVOLEB == 20061020))) %>%
  ungroup()
kvrzcoco_06$TYPDUVODU <- ifelse(kvrzcoco_06$DATUMVOLEB==20061127,0,kvrzcoco_06$TYPDUVODU)
kvrzcoco_06$TYPDUVODU <- ifelse(kvrzcoco_06$DATUMVOLEB==20070125,0,kvrzcoco_06$TYPDUVODU)
kvrzcoco_06$TYPDUVODU[kvrzcoco_06$KODZASTUP==559121 & kvrzcoco_06$DATUMVOLEB==20100417] <- 6

# 2002 data input
kvrzcoco_02 <- read_excel("data/KV2002_reg_20230224_xlsx/kvrzcoco.xlsx")
kvrzcoco_02 <- kvrzcoco_02[kvrzcoco_02$OBVODY<=1,]
kvrzcoco_02 <- kvrzcoco_02[!duplicated(kvrzcoco_02[, c("DATUMVOLEB", "KODZASTUP")]),]
kvrk_02 <- read_excel("data/KV2002_reg_20230224_xlsx/kvrk.xlsx")
kvt_02 <- read_excel("data/KV2002_data_20230224_xlsx/kvt3.xlsx")

test <- kvt_02[,c("KODZASTUP","VOL_SEZNAM")] # analyze the number of inhabitants based on the number of voters
test <- test %>%
  group_by(KODZASTUP) %>%
  summarise(VOL_SEZNAM = sum(VOL_SEZNAM, na.rm = TRUE))
test <- merge(test,kvrzcoco_02[,c("KODZASTUP","POCOBYV")], by = "KODZASTUP")
test$perc <- test$POCOBYV/test$VOL_SEZNAM
mean(test$perc[test$perc>1]) # inhabitants = voters*1.25

# 1998 data input (not used in the analysis)
kvrk_98 <- read_excel("data/ZO1998_řádné/kvrk.xlsx") # candidates
kvrk_98$DATUMVOLEB <- 19981113
kvrk_98_extra <- read_excel("data/ZO99-02-mimořádné/kvrk.xlsx")
colnames(kvrk_98_extra)[colnames(kvrk_98_extra) == "DATUM VOLEB"] <- "DATUMVOLEB"
kvrk_98_extra$DATUMVOLEB <- as.numeric(paste0(sprintf("%04d", year(kvrk_98_extra$DATUMVOLEB)),sprintf("%02d", month(kvrk_98_extra$DATUMVOLEB)),sprintf("%02d", day(kvrk_98_extra$DATUMVOLEB))))
kvrk_98_extra$TYPDUVODU <- NULL
colnames(kvrk_98_extra)[colnames(kvrk_98_extra) == "HLASY"] <- "POCHLASU"
kvrk_98 <- rbind(kvrk_98,kvrk_98_extra)
rm(kvrk_98_extra)
colnames(kvrk_98)[colnames(kvrk_98) == "TITUL"] <- "TITULPRED"
kvrk_98$TITULZA <- kvrk_98$TITULPRED
kvrk_98$KODZASTUP_NAZEV <- NULL
kvrk_98$TYPZASTUP <- NULL
colnames(kvrk_98)[colnames(kvrk_98) == "VSTRANA"] <- "OSTRANA"
kvrk_98$MANDAT <- ifelse(kvrk_98$MANDAT==1,"A","N")
kvrk_98$PLATNOST <- ifelse(kvrk_98$PLATNOST==0,"A","N")
kvrk_98 <- kvrk_98 %>%
  group_by(DATUMVOLEB, KODZASTUP, OSTRANA) %>%
  mutate(
    party_won_mandate = any(MANDAT == "A"),
    PORADINAHR = ifelse(MANDAT == "N" & party_won_mandate, 1, 0)
  ) %>%
  ungroup()
kvrk_98$party_won_mandate <- NULL
kvrzcoco_98 <- read_excel("data/ZO1998_řádné/kvrzcoco.xlsx") # elections
kvrzcoco_98$DATUMVOLEB <- 19981113
kvrzcoco_98$TYPDUVODU <- 0
kvrzcoco_98_extra <- read_excel("data/ZO99-02-mimořádné/kvrzcoco.xlsx")
colnames(kvrzcoco_98_extra)[colnames(kvrzcoco_98_extra) == "DATUM VOLEB"] <- "DATUMVOLEB"
kvrzcoco_98_extra$DATUMVOLEB <- as.numeric(paste0(sprintf("%04d", year(kvrzcoco_98_extra$DATUMVOLEB)),sprintf("%02d", month(kvrzcoco_98_extra$DATUMVOLEB)),sprintf("%02d", day(kvrzcoco_98_extra$DATUMVOLEB))))
colnames(kvrzcoco_98_extra)[colnames(kvrzcoco_98_extra) == "KODZASTUP_NAZEV"] <- "NAZEVZAST"
kvrzcoco_98_extra$POCOBYV <- kvrzcoco_98_extra$VOLICI*1.25
candidate_counts <- kvrk_98 %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(POCET_VS = n_distinct(OSTRANA), .groups = "drop")
kvrzcoco_98_extra <- kvrzcoco_98_extra %>%
  left_join(candidate_counts, by = c("KODZASTUP", "DATUMVOLEB"))
rm(candidate_counts)
kvrzcoco_98_extra$POCOKRSKU <- NULL
kvrzcoco_98$MINOKRSEK <- NULL
kvrzcoco_98$MAXOKRSEK <- NULL
kvrzcoco_98 <- rbind(kvrzcoco_98,kvrzcoco_98_extra)
rm(kvrzcoco_98_extra)
kvt_98 <- kvrzcoco_98[,c("DATUMVOLEB","KODZASTUP","VOLICI","VYDOB","ODOB","HLASYCEL")] # turnout
kvt_98$turnout <- kvt_98$VYDOB/kvt_98$VOLICI*100
kvrzcoco_98$STAV_OBCE <- 0 
kvrzcoco_98$OBEC <- kvrzcoco_98$KODZASTUP
kvrzcoco_98$NAZEVOBCE <- kvrzcoco_98$NAZEVZAST
kvrzcoco_98 <- kvrzcoco_98 %>%
  left_join(select(kvrzcoco_02, KODZASTUP, KRAJ), by = "KODZASTUP")
kvrzcoco_98 <- kvrzcoco_98 %>% distinct()
kvrzcoco_98$KRAJ[kvrzcoco_98$KODZASTUP==598534] <- 999
kvrzcoco_98$KRAJ[kvrzcoco_98$KODZASTUP==542245] <- 999
kvrzcoco_98$KRAJ[kvrzcoco_98$KODZASTUP==542261] <- 999
kvrzcoco_98$KRAJ[kvrzcoco_98$KODZASTUP==542300] <- 999
kvrzcoco_98$KRAJ[kvrzcoco_98$KODZASTUP==549819] <- 999
kvrzcoco_98$KRAJ[kvrzcoco_98$KODZASTUP==569763] <- 999

# candidates combined
kvrk <- rbind(kvrk_22,kvrk_18,kvrk_14,kvrk_10,kvrk_06,kvrk_02)
kvrk$PRESKOCENI <- NULL
kvrk$POCPROCVSE <- NULL
kvrk$PORADIMAND <- NULL
kvrk <- rbind(kvrk,kvrk_98)
kvrk <- kvrk[kvrk$PLATNOST=="A",] # only valid canidates
kvrk$university <-  ifelse(is.na(kvrk$TITULPRED) & is.na(kvrk$TITULZA),0,1) # university-educated candidates

# unique codes of candidates
setDT(kvrk)
kvrk[, ELECTION_YEAR := as.integer(substr(DATUMVOLEB, 1, 4))]
kvrk[, `:=`(
  BIRTH_YEAR = ELECTION_YEAR - VEK,  
  JMENO = tolower(JMENO),            
  PRIJMENI = tolower(PRIJMENI),      
  BLOCK_KEY = paste(JMENO, PRIJMENI, KODZASTUP, sep = "_"),  
  row_id = .I
)]
setkey(kvrk, BLOCK_KEY)
matches <- kvrk[kvrk, on = .(BLOCK_KEY), allow.cartesian = TRUE, nomatch = 0,
                .(id1 = i.row_id, id2 = row_id, 
                  birth_diff = abs(i.BIRTH_YEAR - BIRTH_YEAR))]
matched_pairs <- matches[id1 < id2 & birth_diff <= 1]
g <- graph_from_data_frame(matched_pairs, directed = FALSE)
components <- components(g)
id_dt <- data.table(
  row_id = as.integer(names(components$membership)),
  ID = components$membership
)
kvrk <- merge(kvrk, id_dt, by = "row_id", all.x = TRUE)
next_id <- max(kvrk$ID, na.rm = TRUE) + 1
kvrk[is.na(ID), ID := next_id + .I - 1]
rm(g, id_dt, matched_pairs, matches, components)

# candidates gender
names <- read.csv("data/OpenData-Seznam_jmen_-_20250331.csv")
names <- names[names$DRUH_JMENA=="ZENA",]
kvrk$JMENO <- toupper(kvrk$JMENO)
kvrk$female <- ifelse(kvrk$JMENO %in% names$JMENO, 1, 0)

# turnout combined
kvt <- rbind(kvt_22,kvt_18,kvt_14,kvt_10,kvt_06,kvt_02)
kvt <- kvt %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(
    VOL_SEZNAM = sum(VOL_SEZNAM, na.rm = TRUE),
    VYD_OBALKY = sum(VYD_OBALKY, na.rm = TRUE)
  )
kvt$turnout <- kvt$VYD_OBALKY/kvt$VOL_SEZNAM*100
kvt_98$ODOB <- NULL
kvt_98$HLASYCEL <- NULL
colnames(kvt_98)[colnames(kvt_98) == "VOLICI"] <- "VOL_SEZNAM"
colnames(kvt_98)[colnames(kvt_98) == "VYDOB"] <- "VYD_OBALKY"
kvt <- rbind(kvt,kvt_98)

# final dataset
data <- kvrzcoco_22[,c("DATUMVOLEB","OKRES","KRAJ","TYPZASTUP","KODZASTUP","NAZEVZAST",
                       "OBEC","NAZEVOBCE","MANDATY","POCOBYV","TYPDUVODU","POCET_VS","STAV_OBCE")]
data <- rbind(data,kvrzcoco_18[,c("DATUMVOLEB","OKRES","KRAJ","TYPZASTUP","KODZASTUP","NAZEVZAST",
                                  "OBEC","NAZEVOBCE","MANDATY","POCOBYV","TYPDUVODU","POCET_VS","STAV_OBCE")])
data <- rbind(data,kvrzcoco_14[,c("DATUMVOLEB","OKRES","KRAJ","TYPZASTUP","KODZASTUP","NAZEVZAST",
                                  "OBEC","NAZEVOBCE","MANDATY","POCOBYV","TYPDUVODU","POCET_VS","STAV_OBCE")])
data <- rbind(data,kvrzcoco_10[,c("DATUMVOLEB","OKRES","KRAJ","TYPZASTUP","KODZASTUP","NAZEVZAST",
                                  "OBEC","NAZEVOBCE","MANDATY","POCOBYV","TYPDUVODU","POCET_VS","STAV_OBCE")])
data <- rbind(data,kvrzcoco_06[,c("DATUMVOLEB","OKRES","KRAJ","TYPZASTUP","KODZASTUP","NAZEVZAST",
                                  "OBEC","NAZEVOBCE","MANDATY","POCOBYV","TYPDUVODU","POCET_VS","STAV_OBCE")])
data <- rbind(data,kvrzcoco_02[,c("DATUMVOLEB","OKRES","KRAJ","TYPZASTUP","KODZASTUP","NAZEVZAST",
                                  "OBEC","NAZEVOBCE","MANDATY","POCOBYV","TYPDUVODU","POCET_VS","STAV_OBCE")])
data <- rbind(data,kvrzcoco_98[,c("DATUMVOLEB","OKRES","KRAJ","TYPZASTUP","KODZASTUP","NAZEVZAST",
                                  "OBEC","NAZEVOBCE","MANDATY","POCOBYV","TYPDUVODU","POCET_VS","STAV_OBCE")])

# add turnout 
data <- merge(data,kvt[,c("KODZASTUP","DATUMVOLEB","turnout")], by = c("KODZASTUP","DATUMVOLEB"), all.x = T)

# add turnout evaluation across time
data <- data %>%
  group_by(KODZASTUP) %>%
  mutate(
    turnout_avg = mean(turnout, na.rm = TRUE),
    turnout_time = turnout / turnout_avg
  ) %>%
  ungroup()

# election dates
kvdatumvoleb_02 <- read_excel("data/KV2002_cisel_20230224_xlsx/kvdatumvoleb.xlsx")
kvdatumvoleb_06 <- read_excel("data/KV2006_cisel_20230224_xlsx/kvdatumvoleb.xlsx")
kvdatumvoleb_10 <- read_excel("data/KV2010_cisel_20230224_xlsx/kvdatumvoleb.xlsx")
kvdatumvoleb_14 <- read_excel("data/KV2014_cisel_20230224_xlsx/kvdatumvoleb.xlsx")
kvdatumvoleb_18 <- read_excel("data/KV2018_cisel_20230224_xlsx/kvdatumvoleb.xlsx")
kvdatumvoleb_22 <- read_excel("data/KV2022ciselniky20241208/kvdatumvoleb.xlsx")
data$term <- NA
data$term <- ifelse(data$DATUMVOLEB%in%kvdatumvoleb_02$DATUMVOLEB,2002,data$term)
data$term <- ifelse(data$DATUMVOLEB%in%kvdatumvoleb_06$DATUMVOLEB,2006,data$term)
data$term <- ifelse(data$DATUMVOLEB%in%kvdatumvoleb_10$DATUMVOLEB,2010,data$term)
data$term <- ifelse(data$DATUMVOLEB%in%kvdatumvoleb_14$DATUMVOLEB,2014,data$term)
data$term <- ifelse(data$DATUMVOLEB%in%kvdatumvoleb_18$DATUMVOLEB,2018,data$term)
data$term <- ifelse(data$DATUMVOLEB%in%kvdatumvoleb_22$DATUMVOLEB,2022,data$term)
data$term <- ifelse(is.na(data$term),1998,data$term)

data$date <- as.Date(as.character(data$DATUMVOLEB), format = "%Y%m%d")
data <- data %>%
  mutate(election_year = as.integer(format(date, "%Y")))

# correction of dataset
correct_kodzastup <- data %>%
  group_by(NAZEVZAST, OKRES, KRAJ) %>%
  filter(n() > 1) %>%    
  arrange(date) %>%  
  summarise(correct_KODZASTUP = last(na.omit(KODZASTUP))) %>%
  ungroup()
data <- data %>%
  left_join(correct_kodzastup, by = c("NAZEVZAST", "OKRES", "KRAJ")) %>%
  mutate(KODZASTUP = ifelse(is.na(KODZASTUP) | date == min(date, na.rm = TRUE), correct_KODZASTUP, KODZASTUP)) %>%
  select(-correct_KODZASTUP)
rm(correct_kodzastup)
correct_kodzastup <- data %>%
  group_by(OKRES, KRAJ, OBEC) %>% 
  filter(!is.na(KODZASTUP)) %>%     
  arrange(date) %>%                 
  summarise(correct_KODZASTUP = last(KODZASTUP)) %>%
  ungroup()
data <- data %>%
  left_join(correct_kodzastup, by = c("OKRES", "KRAJ", "OBEC")) %>%
  mutate(KODZASTUP = ifelse(is.na(KODZASTUP) & date == min(date, na.rm = TRUE), correct_KODZASTUP, KODZASTUP)) %>%  
  select(-correct_KODZASTUP)  
rm(correct_kodzastup)
data$KODZASTUP <- ifelse(is.na(data$KODZASTUP),data$OBEC,data$KODZASTUP)
data <- data %>%
  filter(!(data$KODZASTUP==574414 & data$election_year>1998)) # wrong election data of the defunct municipality (Domoradice, d. Vysoké Mýto)
data <- data %>%
  filter(!(data$KODZASTUP==533131 & data$election_year>1998)) # wrong election data of the defunct municipality (Zahořany, d. Beroun)
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==599751] <- 5 # repair null mandates
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==576310] <- 7
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==576140] <- 9
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==566080] <- 6
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==565661] <- 5
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==564664] <- 5
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==564478] <- 7
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==559121] <- 7
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==551627] <- 7
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==539538] <- 5
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==532258] <- 5
data$MANDATY[data$MANDATY==0 & data$KODZASTUP==513512] <- 7

# identify first elections
first_election <- data %>%
  group_by(KODZASTUP) %>%
  summarise(first_election_date = min(date, na.rm = TRUE)) %>%
  ungroup()
data <- data %>%
  left_join(first_election, by = "KODZASTUP")
rm(first_election)

# types of elections
duvod_22 <- read_excel("data/KV2022ciselniky20241208/ctypduvv.xlsx")
table(data$TYPDUVODU)
duvod_22 <- rbind(duvod_22,c(7,"Nová obec","New municipality"))
data$TYPDUVODU <- ifelse(data$TYPDUVODU==1 & data$date==data$first_election_date,7,data$TYPDUVODU)
table(data$TYPDUVODU)

# type of  elections
table(data$TYPDUVODU)
data$new_municipality <- ifelse(data$TYPDUVODU==7,1,0)
data$lack_candidates <- ifelse(data$STAV_OBCE==1,1,0)
data$special_el <- ifelse(data$TYPDUVODU%in%c(1:6),1,0) # special elections
data$new_el <- ifelse(data$TYPDUVODU==1,1,0) # new elections (lack of council members and substitutes)
data$additional_el <- ifelse(data$TYPDUVODU==2,1,0) # additional elections (lack of candidates in the previous election)
data$repeated_el <- ifelse(data$TYPDUVODU%in%c(3,6),1,0) # repreated elections (decided by court)

# next election without enough candidates
data <- data %>%
  arrange(KODZASTUP, date) %>%
  group_by(KODZASTUP) %>% 
  mutate(next_lack_candidates = lead(lack_candidates)) %>%
  ungroup()

# next election because lack of council members and substitutes
data <- data %>%
  arrange(KODZASTUP, date) %>%
  group_by(KODZASTUP) %>% 
  mutate(next_new_el = lead(new_el)) %>%
  ungroup()

# previous lack of candidates
data <- data %>%
  arrange(KODZASTUP, DATUMVOLEB) %>%
  group_by(KODZASTUP) %>%
  mutate(
    previous_lack = lag(cummax(lack_candidates), default = 0)
  ) %>%
  ungroup()

# previous new elections
data <- data %>%
  arrange(KODZASTUP, DATUMVOLEB) %>%
  group_by(KODZASTUP) %>%
  mutate(
    previous_new = lag(cummax(new_el), default = 0)
  ) %>%
  ungroup()

# previous special elections
data <- data %>%
  arrange(KODZASTUP, DATUMVOLEB) %>%
  group_by(KODZASTUP) %>%
  mutate(
    previous_special = lag(cummax(special_el), default = 0)
  ) %>%
  ungroup()

# number of candidates
data <- data %>%
  left_join(
    kvrk %>%
      count(KODZASTUP, DATUMVOLEB, name = "candidates"),
    by = c("KODZASTUP", "DATUMVOLEB")
  ) %>%
  mutate(candidates = replace_na(candidates, 0))
data$candidates_rel <- data$candidates/data$POCOBYV*1000
data$plurality_index <- data$candidates/data$MANDATY
data$plurality_index[is.nan(data$plurality_index)] <- 0

# number of candidate lists
data$POCET_VS_rel <- data$POCET_VS/data$POCOBYV*1000

# age of candidates
avg_age_data <- kvrk %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(candidates_age = mean(VEK, na.rm = TRUE), .groups = "drop")  
data <- data %>%
  left_join(avg_age_data, by = c("KODZASTUP", "DATUMVOLEB"))
rm(avg_age_data)

# education of candidates
education <- kvrk %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(candidates_uni_edu = mean(university, na.rm = TRUE)*100, .groups = "drop")  
data <- data %>%
  left_join(education, by = c("KODZASTUP", "DATUMVOLEB"))
rm(education)
data$log_candidates_uni_edu <- log(data$candidates_uni_edu)

# gender of candidates
gender <- kvrk %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(candidates_female = mean(female, na.rm = TRUE)*100, .groups = "drop")  
data <- data %>%
  left_join(gender, by = c("KODZASTUP", "DATUMVOLEB"))
rm(gender)

# number of councillors
data <- data %>%
  left_join(
    kvrk[kvrk$MANDAT=="A",] %>%
      count(KODZASTUP, DATUMVOLEB, name = "councillors"),
    by = c("KODZASTUP", "DATUMVOLEB")
  ) %>%
  mutate(councillors = replace_na(councillors, 0))
data$councillors_rel <- data$councillors/data$POCOBYV*1000

# share of elected mandates
data$councillors_share <- data$councillors/data$MANDATY*100
data$councillors_share <- ifelse(data$councillors_share>100,100,data$councillors_share)

# number of substitutes
data <- data %>%
  left_join(
    kvrk[kvrk$PORADINAHR>0,] %>%
      count(KODZASTUP, DATUMVOLEB, name = "substitutes"),
    by = c("KODZASTUP", "DATUMVOLEB")
  ) %>%
  mutate(substitutes = replace_na(substitutes, 0))

# minimal number of councillors
data$councillors_min <- round(data$MANDATY/2 + 0.1)
data$councillors_min <- ifelse(data$councillors_min<5,5,data$councillors_min)

# councillors and substitutes above new election
data$above_new_election <- data$councillors+data$substitutes-data$councillors_min
data$above_new_election <- ifelse(data$above_new_election<0,0,data$above_new_election)
data$log_above_new_election <- log(data$above_new_election)
data$log_above_new_election[is.infinite(data$log_above_new_election) & data$log_above_new_election < 0] <- 0

# age of councillors
avg_age_data <- kvrk[kvrk$MANDAT=="A",] %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(councillors_age = mean(VEK, na.rm = TRUE), .groups = "drop")  
data <- data %>%
  left_join(avg_age_data, by = c("KODZASTUP", "DATUMVOLEB"))
rm(avg_age_data)

# university-educated councillors
education <- kvrk[kvrk$MANDAT=="A",] %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(councillors_uni_edu = mean(university, na.rm = TRUE)*100, .groups = "drop")  
data <- data %>%
  left_join(education, by = c("KODZASTUP", "DATUMVOLEB"))
rm(education)
data$log_councillors_uni_edu <- log(data$councillors_uni_edu)

# gender of councillors
gender <- kvrk[kvrk$MANDAT=="A",] %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(councillors_female = mean(female, na.rm = TRUE)*100, .groups = "drop")  
data <- data %>%
  left_join(gender, by = c("KODZASTUP", "DATUMVOLEB"))
rm(gender)

# incumbents
setDT(kvrk)
setorder(kvrk, ID, DATUMVOLEB)
kvrk[, incumbent := ifelse(shift(MANDAT, type = "lag") == "A", 1, 0), by = ID]
kvrk[is.na(incumbent), incumbent := 0]
previous_elections <- kvrk %>%
  select(KODZASTUP, DATUMVOLEB) %>%
  distinct() %>%  
  arrange(KODZASTUP, DATUMVOLEB) %>% 
  group_by(KODZASTUP) %>%
  ungroup()
previous_elections <- previous_elections %>%
  arrange(KODZASTUP, DATUMVOLEB) %>% 
  group_by(KODZASTUP) %>%          
  mutate(previous_election = lag(DATUMVOLEB)) %>%  
  ungroup()
kvrk <- merge(kvrk,previous_elections,by=c("KODZASTUP","DATUMVOLEB"))
library(data.table)
setDT(kvrk)
kvrk[, key := paste(ID, previous_election)]
valid_keys <- kvrk[, paste(ID, DATUMVOLEB)]
kvrk[incumbent == 1 & !(key %in% valid_keys), incumbent := 0]
kvrk[, key := NULL]
incumbents_count <- kvrk[kvrk$MANDAT=="A"] %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(incumbents = sum(incumbent, na.rm = TRUE), .groups = "drop")
data <- data %>%
  left_join(incumbents_count, by = c("KODZASTUP" = "KODZASTUP", "DATUMVOLEB" = "DATUMVOLEB"))
data$incumbents[is.na(data$incumbents)] <- 0
data$incumbents <- data$incumbents/data$MANDATY*100

# incumbents check
candidates <- readRDS("data/candidates_panel.rds")
candidates <- candidates[candidates$election_type=="Municipal" & candidates$candidate_seat==1,]
candidates$code <- paste0(candidates$person_id,"-",candidates$election_year)
numeric_parts <- as.numeric(sub(".*-(\\d+)", "\\1", candidates$code))
target_values <- numeric_parts - 4
target_codes <- paste0(sub("(.*-)(\\d+)", "\\1", candidates$code), target_values)
matches <- target_codes %in% candidates$code
candidates$incumbent <- as.integer(matches)
rm(matches,numeric_parts,target_codes,target_values)
incumbents_count <- candidates %>%
  group_by(municipality_id, election_year) %>%
  summarise(incumbents_check = sum(incumbent, na.rm = TRUE), .groups = "drop")
data <- data %>%
  left_join(incumbents_count, by = c("KODZASTUP" = "municipality_id", "election_year" = "election_year"))
data$incumbents_check[is.na(data$incumbents_check)] <- 0
data$incumbents_check <- data$incumbents_check/data$MANDATY*100

# novices
kvrk <- kvrk %>%
  group_by(ID) %>%
  mutate(novice = if_else(DATUMVOLEB == min(DATUMVOLEB), 1, 0)) %>%
  ungroup()
novices_count <- kvrk %>%
  group_by(KODZASTUP, DATUMVOLEB) %>%
  summarise(novices = sum(novice, na.rm = TRUE), .groups = "drop")
data <- data %>%
  left_join(novices_count, by = c("KODZASTUP" = "KODZASTUP", "DATUMVOLEB" = "DATUMVOLEB"))
data$novices <- data$novices/data$candidates*100

# NA corrections
data <- data %>%
  mutate(
    councillors = ifelse(is.na(turnout), NA, councillors),
    councillors_share = ifelse(is.na(turnout), NA, councillors_share),
    substitutes = ifelse(is.na(turnout), NA, substitutes),
    above_new_election = ifelse(is.na(turnout), NA, above_new_election),
    log_above_new_election = ifelse(is.na(turnout), NA, log_above_new_election),
    councillors_rel = ifelse(is.na(turnout), NA, councillors_rel),
    councillors_age = ifelse(is.na(turnout), NA, councillors_age),
    councillors_uni_edu = ifelse(is.na(turnout), NA, councillors_uni_edu),
    incumbents_check = ifelse(is.na(turnout), NA, councillors_uni_edu),
    incumbents = ifelse(is.na(turnout), NA, incumbents)
  )

# dataset since 2002
data <- data[data$term>=2002,]

# descriptive analysis
descriptive <- data %>%
  mutate(pop_group = if_else(POCOBYV > 2000, "2000_and_over", "under_2000")) %>%
  group_by(TYPZASTUP, pop_group) %>%
  summarise(
    sum_unique_kodzastup = n_distinct(KODZASTUP),
    total_elections = n(),
    lack_candidates = sum(lack_candidates, na.rm = TRUE),
    new_municipality = sum(new_municipality, na.rm = TRUE),
    new_el = sum(new_el, na.rm = TRUE),
    additional_el = sum(additional_el, na.rm = TRUE),
    repeated_el = sum(repeated_el, na.rm = TRUE)) %>%
  ungroup()
write_xlsx(descriptive, "export/summary_data.xlsx")
data$NAZEVZAST[data$POCOBYV>=2000 & data$new_el==1] # towns with new elections
data$NAZEVZAST[data$POCOBYV>=2000 & data$new_municipality==1] # new town
data$NAZEVZAST[data$POCOBYV>=2000 & data$repeated_el==1] # town with repeated election
data$NAZEVZAST[data$TYPZASTUP==2 & data$new_el==1] # municipal districts with new election

# barplot
terms <- aggregate(cbind(new_el, additional_el) ~ term, data = data[data$TYPZASTUP==1 & data$POCOBYV<=2000,], sum, na.rm = TRUE)
terms_matrix <- as.matrix(terms[, -1])
rownames(terms_matrix) <- terms$term
rownames(terms_matrix)[1] <- paste0(rownames(terms_matrix)[1],"-2006")
rownames(terms_matrix)[2] <- paste0(rownames(terms_matrix)[2],"-2010")
rownames(terms_matrix)[3] <- paste0(rownames(terms_matrix)[3],"-2014")
rownames(terms_matrix)[4] <- paste0(rownames(terms_matrix)[4],"-2018")
rownames(terms_matrix)[5] <- paste0(rownames(terms_matrix)[5],"-2022")
rownames(terms_matrix)[6] <- paste0(rownames(terms_matrix)[6],"-2024*")

tiff("export/terms.tiff", width = 10, height = 5, units = "in", res = 300)
bp <- barplot(t(terms_matrix),
  beside = FALSE, col = c("black", "white"), legend.text = c("New Elections", "Additional Elections"),
  args.legend = list(x = "top", horiz = TRUE), las=1,
  main = "", xlab = "Term", ylab = "Elections", ylim = c(0,250))
cumulative_heights <- apply(terms_matrix, 1, cumsum)
cumulative_heights[2,] <- cumulative_heights[2,]
for (i in 1:nrow(terms_matrix)) {
  bottom_position <- c(0, cumulative_heights[-nrow(cumulative_heights), i])
  top_position <- cumulative_heights[, i]
  mid_point <- bottom_position + (top_position - bottom_position) / 2
  text(x = bp[i], y = mid_point, labels = as.character(t(terms_matrix)[, i]),
    col = c("white","black"), cex = 0.8, font = 1)
}
dev.off()

# maps
casti <- casti()
colnames(casti) <- c("KOD_OBEC","NAZ_OBEC","KOD_POU","NAZ_POU","geometry")
obce <- obce_polygony()
obce <- rbind(obce[,c("KOD_OBEC","NAZ_OBEC","KOD_POU","NAZ_POU","geometry")],casti)
republika <- republika()
kraje <- kraje()

# map of lack of candidates
map_lack_candidates <- data[data$lack_candidates==1 & data$TYPZASTUP==1 & data$POCOBYV<=2000,] %>%
  group_by(KODZASTUP) %>%
  summarise(lack_candidates_sum = sum(lack_candidates, na.rm = TRUE))
map_lack_candidates <- merge(obce,map_lack_candidates,by.y="KODZASTUP",by.x="KOD_OBEC")
tiff("export/map_lack_candidates.tiff", width = 10, height = 5, units = "in", res = 300)
ggplot(data = map_lack_candidates) +
  geom_sf(fill = "grey50", color = "black", show.legend = FALSE) +
  geom_sf(data = republika, fill = NA, color = "black", size = 1) +
  geom_sf(data = kraje, fill = NA, color = "black", size = 1) +
  theme_void()
dev.off()

# map of next new elections
map_new_el <- data[data$new_el==1 & data$TYPZASTUP==1 & data$POCOBYV<=2000,] %>%
  group_by(KODZASTUP) %>%
  summarise(new_el_sum = sum(new_el, na.rm = TRUE))
map_new_el <- merge(obce,map_new_el,by.y="KODZASTUP",by.x="KOD_OBEC")
tiff("export/map_new_el.tiff", width = 10, height = 5, units = "in", res = 300)
ggplot(data = map_new_el) +
  geom_sf(fill = "grey50", color = "black", show.legend = FALSE) +
  geom_sf(data = republika, fill = NA, color = "black", size = 1) +
  geom_sf(data = kraje, fill = NA, color = "black", size = 1) +
  theme_void()
dev.off()

# timing of new elections
data <- data %>%
  mutate(DATE = ymd(DATUMVOLEB)) %>%
  arrange(KODZASTUP, DATE)
data_with_next <- data[data$TYPZASTUP==1 & data$POCOBYV<=2000,] %>%
  group_by(KODZASTUP) %>%
  mutate(
    next_DATE = lead(DATE),
    next_new_el_flag = lead(next_new_el),
    next_term = lead(term)  # Get the term of the next election
  ) %>%
  ungroup()
result <- data_with_next %>%
  filter(next_new_el == 1) %>%
  mutate(
    days_between = as.integer(next_DATE - DATE)
  ) %>%
  select(KODZASTUP, DATE, next_DATE, days_between, term, next_term)
result <- result[!is.na(result$days_between),]
mean(result$days_between) # average number of days between normal and new elections
ggplot(result, aes(x = days_between)) +
  geom_histogram(binwidth = 50, fill = "black", alpha = 0.5, origin = 0) +  
  labs(title = "", x = "Days After Previous Election", y = "New Elections") +
  scale_y_continuous(labels = scales::comma, breaks = seq(0, 200, by = 50)) +
  theme_minimal(base_size = 12) +  
  theme(panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
    axis.text = element_text(size = 10)) +
  coord_cartesian(xlim = c(0, max(result$days_between))) +  
  scale_x_continuous(breaks = seq(0, max(result$days_between), by = 200),labels = scales::comma)
ggsave("export/hist_new_el.jpeg", width = 10, height = 5, dpi = 300)

# municipalities with new election
new_elections <- data %>%
  group_by(MANDATY) %>%
  summarise(sum_next_new_el = sum(next_new_el, na.rm = TRUE))
length(unique(data$KODZASTUP[data$term==2022 & data$MANDATY==5])) # current municipalities with 5,6,7,8 councillors
length(unique(data$KODZASTUP[data$term==2022 & data$MANDATY==6]))
length(unique(data$KODZASTUP[data$term==2022 & data$MANDATY==7]))
length(unique(data$KODZASTUP[data$term==2022 & data$MANDATY==8]))
filtered_data <- data %>%  # municipalities with 0,1,2,3,4 councillors above new election as a reserve
  filter(DATUMVOLEB == 20220923, !is.na(turnout))
latest_elections <- filtered_data %>%
  group_by(KODZASTUP) %>%
  filter(DATUMVOLEB == max(DATUMVOLEB)) %>%
  ungroup()
municipality_count <- latest_elections %>%
  count(above_new_election)
new_elections_data <- data %>% # reaction to the new elections
  filter(new_el == 1)
reaction_new <- new_elections_data %>%
  group_by(KODZASTUP) %>%
  filter(DATUMVOLEB == min(DATUMVOLEB)) %>%
  select(KODZASTUP, DATUMVOLEB, MANDATY) %>%
  ungroup()
merged_data <- left_join(reaction_new, data, by = "KODZASTUP")
subsequent_elections <- merged_data %>%
  filter(DATUMVOLEB.y > DATUMVOLEB.x)
min_mandates_summary <- subsequent_elections %>%
  group_by(KODZASTUP) %>%
  summarize(
    min_mandates = min(MANDATY.y, na.rm = TRUE),
    max_mandates = max(MANDATY.y, na.rm = TRUE)
  ) %>%
  ungroup()
final_new <- left_join(reaction_new, min_mandates_summary, by = "KODZASTUP")
final_new$increased <- ifelse(final_new$MANDATY<final_new$max_mandates,1,0)
final_new$decreased <- ifelse(final_new$MANDATY>final_new$min_mandates,1,0)

table(filtered_data$above_new_election[filtered_data$TYPZASTUP==1])

# municipalities with additional election
lack_candidates <- data %>%
  group_by(MANDATY) %>%
  summarise(sum_next_lack_candidates = sum(next_lack_candidates, na.rm = TRUE))
lack_candidates_data <- data %>% # reaction to the lack of candidates
  filter(lack_candidates == 1)
reaction_lack <- lack_candidates_data %>%
  group_by(KODZASTUP) %>%
  filter(DATUMVOLEB == min(DATUMVOLEB)) %>%
  select(KODZASTUP, DATUMVOLEB, MANDATY) %>%
  ungroup()
merged_data <- left_join(reaction_lack, data, by = "KODZASTUP")
subsequent_elections <- merged_data %>%
  filter(DATUMVOLEB.y > DATUMVOLEB.x)
min_mandates_summary <- subsequent_elections %>%
  group_by(KODZASTUP) %>%
  summarize(
    min_mandates = min(MANDATY.y, na.rm = TRUE),
    max_mandates = max(MANDATY.y, na.rm = TRUE)
  ) %>%
  ungroup()
final_lack <- left_join(reaction_lack, min_mandates_summary, by = "KODZASTUP")
final_lack$increased <- ifelse(final_lack$MANDATY<final_lack$max_mandates,1,0)
final_lack$decreased <- ifelse(final_lack$MANDATY>final_lack$min_mandates,1,0)

# analytical dataset
data_muni <- data[data$TYPZASTUP==1 & !is.na(data$next_lack_candidates) & !is.na(data$councillors_age) & data$POCOBYV<=2000,] # preparation of dataset
data_muni$log_POCOBYV <- log(data_muni$POCOBYV,)



test <- data[data$TYPZASTUP==1 & data$POCOBYV<=2000,]


# descriptive table
summary_table <- data_muni %>%
  summarise(
    across(
      c(next_lack_candidates, next_new_el, POCOBYV, MANDATY, previous_lack, previous_new, turnout_time, plurality_index,
        novices, candidates_age, candidates_uni_edu, candidates_female,
        incumbents, councillors_age, councillors_uni_edu, councillors_female),
      list(
        n = ~sum(!is.na(.)),
        min = ~min(., na.rm = TRUE),
        max = ~max(., na.rm = TRUE),
        mean = ~mean(., na.rm = TRUE),
        sd = ~sd(., na.rm = TRUE)
      ),
      .names = "{.col}_{.fn}"
    )
  ) %>%
  pivot_longer(
    everything(),
    names_to = c("variable", "stat"),
    names_pattern = "^(.*)_(n|min|max|mean|sd)$",
    values_to = "value"
  ) %>%
  pivot_wider(
    names_from = stat,
    values_from = value
  )
summary_table <- summary_table %>%
  mutate(across(where(is.numeric), ~round(., 3)))
write_xlsx(summary_table, "export/variables.xlsx")

# models
hist(data_muni$log_POCOBYV, breaks = 100)
hist(data_muni$MANDATY)
hist(data_muni$turnout_time, breaks = 100)
hist(data_muni$plurality_index, breaks = 100)
hist(data_muni$novices, breaks = 100)
hist(data_muni$candidates_age, breaks = 100)
hist(data_muni$candidates_uni_edu, breaks = 100)
hist(data_muni$candidates_female, breaks = 100)
hist(data_muni$incumbents, breaks = 100)
hist(data_muni$councillors_age)
hist(data_muni$councillors_uni_edu, breaks = 100)
hist(data_muni$councillors_female, breaks = 100)

model_lack_candidates <- glm(next_lack_candidates ~ log_POCOBYV + MANDATY + previous_lack
                             + turnout_time 
                             + plurality_index + novices + candidates_age + candidates_uni_edu + candidates_female,
                             data = data_muni, family = binomial(link = "logit"))
summary(model_lack_candidates)
vif(model_lack_candidates)
cor(data_muni[,c("log_POCOBYV","MANDATY","previous_lack","turnout_time","plurality_index","novices","candidates_age","candidates_uni_edu","candidates_female")], use="complete.obs")

model_new_election <- glm(next_new_el ~ log_POCOBYV + MANDATY + previous_new
                          + turnout_time
                          + plurality_index + novices 
                          + incumbents + councillors_age + councillors_uni_edu + councillors_female,
                          data = data_muni, family = binomial(link = "logit"))
summary(model_new_election)
vif(model_new_election)
cor(data_muni[,c("log_POCOBYV","MANDATY","previous_new","turnout_time","plurality_index","novices","incumbents","councillors_age","councillors_uni_edu","councillors_female")], use="complete.obs")

 # ROC curves
roc_curve <- roc(data_muni$next_lack_candidates, fitted(model_lack_candidates))
plot(roc_curve, col="blue", main="ROC Curve")
auc(roc_curve)
roc_curve <- roc(data_muni$next_new_el, fitted(model_new_election))
plot(roc_curve, col="blue", main="ROC Curve")
auc(roc_curve)

# odds ratios
plot_model(model_lack_candidates, type = "est", show.values = TRUE, show.p = TRUE, transform = "exp")
plot_model(model_new_election, type = "est", show.values = TRUE, show.p = TRUE, transform = "exp")

# regression tables
stargazer(model_lack_candidates, model_new_election, type = "html", out = "export/tables.html")

# interpretation - lack of candidates
pred_lack_previous <- ggpredict(model_lack_candidates, ci_level = 0.95, terms = "previous_lack[0:1]",
                                condition = list(MANDATY = 7, turnout_time = 1))

pred_lack_mandates <- ggpredict(model_lack_candidates, ci_level = 0.95, terms = "MANDATY[5:15]",
                                condition = list(previous_lack = 0, turnout_time = 1))
ggplot(pred_lack_mandates, aes(x = x, y = predicted)) +
  geom_point(size = 1.5) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.15) +
  labs(title = "", x = "Mandates", y = "Predicted Probability of Lack of Candidates (%)") +
  scale_y_continuous(
    labels = function(x) scales::percent(x, accuracy = 0.1) %>% gsub("%", "", .),
    breaks = seq(0, 0.11, by = 0.001)) +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
    axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.006)) + 
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
ggsave("export/lack_mandates.jpeg", width = 10, height = 4, dpi = 300)

pred_lack_plurality <- ggpredict(model_lack_candidates, ci_level = 0.95, terms = "plurality_index[0:10]",
                                 condition = list(previous_lack = 0, turnout_time = 1, MANDATY = 7))
plot_pred_lack_plurality <- plot(pred_lack_plurality) + 
  labs(title = "a)", x = "Plurality Index", y = "Predicted Probability of Lack of Candidates (%)") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 0.1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.01, by = 0.001)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.006))
pred_lack_candidates_edu <- ggpredict(model_lack_candidates, ci_level = 0.95, terms = "candidates_uni_edu[0:100]",
                                      condition = list(previous_lack = 0, turnout_time = 1, MANDATY = 7))
plot_pred_lack_candidates_edu <- plot(pred_lack_candidates_edu) + 
  labs(title = "b)", x = "University Educated Candidates (%)", y = "") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 0.1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.01, by = 0.001)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.006))
combined_plot <- grid.arrange(plot_pred_lack_plurality,plot_pred_lack_candidates_edu,ncol = 2)
ggsave("export/lack_plurality_edu.jpeg",
       plot = combined_plot,
       width = 12, height = 5, dpi = 300)

# interpretation - new election
pred_new_previous <- ggpredict(model_new_election, ci_level = 0.99, terms = "previous_new[0:1]",
                               condition = list(MANDATY = 7, turnout_time = 1))

pred_new_mandates <- ggpredict(model_new_election, ci_level = 0.99, terms = "MANDATY[5:15]",
                               condition = list(previous_new = 0, turnout_time = 1))
ggplot(pred_new_mandates, aes(x = x, y = predicted)) +
  geom_point(size = 1.5) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.15) +
  labs(title = "", x = "Mandates", y = "Predicted Probability of New Election (%)") +
  scale_y_continuous(
    labels = function(x) scales::percent(x, accuracy = 1) %>% gsub("%", "", .),
    breaks = seq(0, 0.11, by = 0.02)) +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
    axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.11)) + 
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
ggsave("export/new_mandates.jpeg", width = 10, height = 4, dpi = 300)

pred_new_plurality <- ggpredict(model_new_election, ci_level = 0.99, terms = "plurality_index[0:10]",
                                condition = list(previous_new = 0, turnout_time = 1, MANDATY = 7))
plot_pred_new_plurality <- plot(pred_new_plurality) + 
  labs(title = "a)", x = "Plurality Index", y = "Predicted Probability of New Election (%)") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.2, by = 0.02)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.2))
pred_new_turnout <- ggpredict(model_new_election, ci_level = 0.99, terms = "turnout_time[0:2]",
                              condition = list(previous_new = 0, MANDATY = 7))
plot_pred_new_turnout <- plot(pred_new_turnout) + 
  labs(title = "b)", x = "Turnout Deviation (%)", y = "") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.2, by = 0.02)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.2))
combined_plot <- grid.arrange(plot_pred_new_plurality,plot_pred_new_turnout,ncol = 2)
ggsave("export/new_plurality_turnout.jpeg",
       plot = combined_plot,
       width = 12, height = 5, dpi = 300)

pred_new_incumbents <- ggpredict(model_new_election, ci_level = 0.99, terms = "incumbents[0:100]",
                                 condition = list(previous_new = 0, turnout_time = 1, MANDATY = 7))
plot_pred_new_incumbents <- plot(pred_new_incumbents) + 
  labs(title = "b)", x = "Incumbents Share (%)", y = "") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.1, by = 0.01)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.08))
pred_new_age <- ggpredict(model_new_election, ci_level = 0.99, terms = "councillors_age[25:65]",
                          condition = list(previous_new = 0, turnout_time = 1, MANDATY = 7))
plot_pred_new_age <- plot(pred_new_age) + 
  labs(title = "a)", x = "Councillors' Average Age (%)", y = "Predicted Probability of New Election (%)") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.1, by = 0.01)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.08))

pred_new_edu <- ggpredict(model_new_election, ci_level = 0.99, terms = "councillors_uni_edu[0:100]",
                          condition = list(previous_new = 0, turnout_time = 1, MANDATY = 7))
plot_pred_new_edu <- plot(pred_new_edu) + 
  labs(title = "a)", x = "University Educated Councillors (%)", y = "Predicted Probability of New Election (%)") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.1, by = 0.01)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.04))
pred_new_female <- ggpredict(model_new_election, ci_level = 0.99, terms = "councillors_female[0:100]",
                             condition = list(previous_new = 0, turnout_time = 1, MANDATY = 7))
plot_pred_new_female <- plot(pred_new_female) + 
  labs(title = "b)", x = "Female Councillors (%)", y = "") +
  scale_y_continuous(labels = function(x) scales::percent(x, accuracy = 1) %>% gsub("%", "", .),
                     breaks = seq(0, 0.1, by = 0.01)) +
  theme_minimal(base_size = 12) +
  theme(panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(),
        panel.grid.major = element_line(color = "gray80", linetype = "dotted", size = 0.5),
        axis.text = element_text(size = 10)) +
  coord_cartesian(ylim = c(0, 0.04))
combined_plot <- grid.arrange(plot_pred_new_age,plot_pred_new_incumbents,nrow = 1, ncol = 2)
ggsave("export/new_incumbents_age.jpeg",
       plot = combined_plot,
       width = 12, height = 5, dpi = 300)




